# datasets
emissions = read_csv("https://raw.githubusercontent.com/mrbarron3/group_project/main/data.csv")

emissions = emissions %>%
  group_by(Country) %>%
  mutate(avg_epsv = mean(Environmental_Policy_Stringency_Value)) %>%
  ungroup() %>%
  mutate(total_avg_epsv = median(avg_epsv)) %>%
  arrange(desc(avg_epsv), Year)

emissions_top = emissions %>%
  filter(avg_epsv > total_avg_epsv)

emissions_bottom = emissions %>%
  filter(avg_epsv <= total_avg_epsv)

emissions_groups = emissions %>%
  group_by(Country) %>%
  count()

emissions_diff = emissions %>%
  group_by(Country) %>%
  mutate(year_max = max(Year), year_min = min(Year)) %>%
  filter((Year == year_max | Year == year_min) & (Year == 2020 | Year == 1990)) %>%
  mutate(Year = as.character(Year)) %>%
  select(Country, Year, Environmental_Policy_Stringency_Value) %>%
  pivot_wider(names_from = Year, values_from = Environmental_Policy_Stringency_Value) %>%
  mutate(diff = `2020` - `1990`) %>%
  drop_na()

emi_90_20 = emissions_diff %>%
  pivot_longer(c("2020", "1990"), names_to = "Year", values_to = "epsi")

emi_90_20
## # A tibble: 54 × 4
## # Groups:   Country [27]
##    Country      diff Year   epsi
##    <chr>       <dbl> <chr> <dbl>
##  1 Switzerland  3    2020  4.5  
##  2 Switzerland  3    1990  1.5  
##  3 Japan        2.19 2020  3.78 
##  4 Japan        2.19 1990  1.58 
##  5 Finland      2.53 2020  4.11 
##  6 Finland      2.53 1990  1.58 
##  7 France       3.44 2020  4.89 
##  8 France       3.44 1990  1.44 
##  9 Denmark      3.31 2020  3.72 
## 10 Denmark      3.31 1990  0.417
## # … with 44 more rows
emissions_diff
## # A tibble: 27 × 4
## # Groups:   Country [27]
##    Country     `1990` `2020`  diff
##    <chr>        <dbl>  <dbl> <dbl>
##  1 Switzerland  1.5     4.5   3   
##  2 Japan        1.58    3.78  2.19
##  3 Finland      1.58    4.11  2.53
##  4 France       1.44    4.89  3.44
##  5 Denmark      0.417   3.72  3.31
##  6 Sweden       0.694   3.83  3.14
##  7 Italy        1.61    3.72  2.11
##  8 Norway       0.472   3.94  3.47
##  9 Germany      1.44    3.47  2.03
## 10 Netherlands  1.56    3.47  1.92
## # … with 17 more rows
nrow(emissions_groups)
## [1] 33
# line plot
line_plot = ggplot(emissions, aes(x = Year, y = Environmental_Policy_Stringency_Value, color = Country)) +
  geom_line() +
  geom_point(size = 0.5) +
  labs(y = "Environment Policy Stringency Value", title = "Environmental Policy Stringency Indicies Over the Years") 

ggplotly(line_plot)
# probably too much info
# density ridges
ggplot(emissions) +
  geom_density_ridges(aes(Environmental_Policy_Stringency_Value, reorder(Country, Environmental_Policy_Stringency_Value)))# +

#  labs(y = "Country", title = "Environmental Policy Stringency Indicies By Country", subtitle = "Over the Years 1990-2020 where available")
# combined box plots
a = ggplot(emissions_top) +
  geom_boxplot(aes(Environmental_Policy_Stringency_Value, reorder(Country, Environmental_Policy_Stringency_Value, mean))) +
  labs(y = "Country", title = "a")

b = ggplot(emissions_bottom) +
  geom_boxplot(aes(Environmental_Policy_Stringency_Value, reorder(Country, Environmental_Policy_Stringency_Value, mean))) +
  labs(y = "Country", title = "b")

a + b +
  plot_annotation(title = "Environmental Policy Stringency Indicies By Country", subtitle = "1990-2020")

# single boxplot
ggplot(emissions) +
  geom_boxplot(aes(Environmental_Policy_Stringency_Value, reorder(Country, Environmental_Policy_Stringency_Value, mean))) +
  labs(x = "Environmental Policy Stringency Index Value", y = "Country", title = "Environmental Policy Stringency Indicies By Country, 1990-2020", subtitle = "A Smaller Subset of Years May be Used Depending on Data Availability")

# bar plot with differences between newest and oldest data
ggplot(emissions_diff) +
  geom_col(aes(x = reorder(Country, diff), y = diff)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  labs(x = "Country", y = "Increase Amount", title = "Increase Between 1990 and 2020 Environment Policy Stringency Index Value", subtitle = "Only Includes Countries Where Both 2020 and 1990 Data is Available") +
  scale_y_continuous(expand = c(0, 0))

# double bar plot
ggplot(emi_90_20) +
  geom_col(aes(x = reorder(Country, epsi, max), y = epsi, fill = Year), width = 0.7, position = "dodge") + 
  scale_fill_manual(values = c("#a67abf", "#99bf7a")) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(x = "Country", y = "Environmental Policy Stringency Index", title = "Environmental Policy Stringency Index by Country and Year", subtitle = "Only Includes Countries Where Both 2020 and 1990 Data is Available")